실시간 코인 가격
<!DOCTYPE html>
비트코인 예측모델 상세¶
모델 정보 : v1.0.1
타겟 : bitcoin
문제 : regression
데이터 : poloniex
데이터 학습 기간 : 2018년 1월 ~ 2019년 2월프로그램 : Python
패키지 : sklearn
알고리즘 : RF, GBM, eXtra Trees, Bayesian Ridge, Elastic Net기타 : 작업 중 모델
In [1]:
import numpy as np
import os
import pandas as pd
import urllib.request
import datetime as dt
import statsmodels.api as sm
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=False)
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, ExtraTreesRegressor
from sklearn.linear_model import BayesianRidge, ElasticNetCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
def GetAPIUrl(cur, sts = 1420070400):
'''
Makes a URL for querying historical prices of a cyrpto from Poloniex
cur: 3 letter abbreviation for cryptocurrency (BTC, LTC, etc)
'''
return 'https://poloniex.com/public?command=returnChartData¤cyPair=USDT_{:s}&start={:d}&end=9999999999&period=7200'.format(cur, sts)
def GetCurDF(cur):
'''
cur: 3 letter abbreviation for cryptocurrency (BTC, LTC, etc)
'''
openUrl = urllib.request.urlopen(GetAPIUrl(cur))
r = openUrl.read()
openUrl.close()
df = pd.read_json(r.decode())
return df
# Different cryptocurrency types
# cl = ['BTC', 'LTC', 'ETH', 'XMR']
cl = ['BTC']
# Columns of price data to use
CN = ['close', 'high', 'low', 'open', 'volume']
In [2]:
# download data
data = GetCurDF('BTC')
data.tail()
Out[2]:
In [3]:
# data range
print(data.date.agg([min,max]))
In [4]:
# data slice
data['ymd'] = data.date.astype(str).str[:10]
data = data[data['ymd']>'2018-01-01'].copy()
del data['ymd']
print(data.date.agg([min,max]))
In [5]:
# features
data['oc_diff']=data['close']-data['open']
data['daily_avg'] = (data['open'] + data['high'] + data['low'] + data['close']) / 4
data['daily_avg_After_Month']=data['daily_avg'].shift(-30)
In [6]:
data.index = data.date
data.head()
del data['date']
In [7]:
# Bitcoin (BTC)
BTC = data.copy()
BTC['daily_avg_After_Month']=BTC['daily_avg'].shift(-30)
X_BTC = BTC.dropna().drop(['daily_avg_After_Month','daily_avg'], axis=1)
y_BTC = BTC.dropna()['daily_avg_After_Month']
X_train_BTC, X_test_BTC, y_train_BTC, y_test_BTC = train_test_split(X_BTC, y_BTC, test_size=0.2, random_state=43)
X_forecast_BTC = BTC.tail(30).drop(['daily_avg_After_Month','daily_avg'], axis=1)
In [8]:
# define regression function
def regression(X_train, X_test, y_train, y_test):
Regressor = {
'Random Forest Regressor': RandomForestRegressor(n_estimators=200),
'Gradient Boosting Regressor': GradientBoostingRegressor(n_estimators=500),
'ExtraTrees Regressor': ExtraTreesRegressor(n_estimators=500, min_samples_split=5),
'Bayesian Ridge': BayesianRidge(),
'Elastic Net CV': ElasticNetCV(cv=3)
}
for name, clf in Regressor.items():
print(name)
clf.fit(X_train, y_train)
print(f'R2: {r2_score(y_test, clf.predict(X_test)):.2f}')
print(f'MAE: {mean_absolute_error(y_test, clf.predict(X_test)):.2f}')
print(f'MSE: {mean_squared_error(y_test, clf.predict(X_test)):.2f}')
In [9]:
# Bitcoin (BTC)
print('Bitcoin (BTC):')
regression(X_train_BTC, X_test_BTC, y_train_BTC, y_test_BTC)
In [10]:
# define prediction function
def prediction(name, X, y, X_forecast):
if name in ['XRP', 'LTC']:
model = RandomForestRegressor(n_estimators=200)
else:
model = ExtraTreesRegressor(n_estimators=500, min_samples_split=5)
model.fit(X, y)
target = model.predict(X_forecast)
return target
In [11]:
# calculate forecasted prices for next 30 days
forecasted_BTC = prediction('BTC', X_BTC, y_BTC, X_forecast_BTC)
In [12]:
# define index for next 30 days
last_date=data.iloc[-1].name
modified_date = last_date + dt.timedelta(days=1)
new_date = pd.date_range(modified_date,periods=30,freq='D')
# assign prediction to newly defined index
forecasted_BTC = pd.DataFrame(forecasted_BTC, columns=['daily_avg'], index=new_date)
# combine historical price and predicted price
bitcoin = pd.concat([data[['daily_avg']], forecasted_BTC])
In [13]:
# ouput
output = bitcoin['2019-01-02':].copy()
output.head()
Out[13]:
In [14]:
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# plot
Y = go.Scatter(
x=output.index[:-30],
y=output[:-30]['daily_avg'],
name = "Historical Price",
line = dict(color = 'blue',width=1),
opacity = 0.8)
Yhat = go.Scatter(
x=output.index[-31:],
y=output[-31:]['daily_avg'],
name = "Predicted Price",
line = dict(color = 'red',dash='dot',width=1),
opacity = 0.8)
data = [Y,Yhat]
layout = dict(title='2019년 3월 비트코인 예측',xaxis = {'tickformat': '%Y-%m-%d'})
fig = dict(data=data, layout=layout)
plot(fig,filename="2019-02-23-bitcoin-prediction-v101-plot.html")
Out[14]: